setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

library(patchwork)
library(tidyverse)
library(broom)
library(stargazer)
library(cowplot)

### Setup:
n = 1000
races = c("White", "Black") # Number of races
compliance = rep(0.63, length(races)) # start with no asymmetric compliance
baserate = 0.15
ate = -0.05


### Run one experiment

one_audit = function(n, races, compliance, callback_rates){
  
  # Make a data frame of compliance by race
  compliance_dat = data.frame(race = races,
                              compliance = compliance)
  
  # Make a data frame of ATE by observed_race
  ate_dat = data.frame(observed_race = races,
                       callback_rate = callback_rates)
  
  #set.seed(12345)
  dat = data.frame( 
    id = 1:n, # respondent ID
    race = sample(races, n, replace=TRUE) # randomizing treatment assignment
  ) %>% left_join(compliance_dat, by = "race")
  
  
  ### Induce noncompliance and identify the observed race
  noncomply_vec = c()
  observed_race_vec = c()
  for(i in 1:nrow(dat)){
    noncomply = rbinom(1,1,prob=1-dat$compliance[i])==1
    if(noncomply){
      observed_race = sample(x = c(setdiff(races, dat$race[i])), size = 1)
    } else(
      observed_race = dat$race[i]
    )
    noncomply_vec = c(noncomply_vec, noncomply)
    observed_race_vec = c(observed_race_vec, observed_race)
  }
  
  dat$noncomply = noncomply_vec
  dat$observed_race = observed_race_vec
  
  ## Sanity check
  #mean(dat$observed_race == dat$race)
  
  ### Calculate the callback rate
  dat = dat %>% left_join(ate_dat, by = "observed_race") %>%
    mutate(callback = sapply(callback_rate, FUN = function(x) rbinom(1, 1, prob = x)))
  
  ## Sanity check
  #dat %>% group_by(callback_rate) %>% summarize(mean(callback))
  dat$race = factor(dat$race, levels = races)
  
  out = broom::tidy(lm(callback ~ race, dat))[-1,]
  return(out)
}


### Four sets of simulations:

## a) Two races, symmetric compliance
## b) Two races, asymmetric compliance
## c) Four races, symmetric compliance
## d) Four races, asymmetric compliance

set.seed(10012)

## (a) Two races, symmetric compliance

params1 = expand.grid(baserate = seq(0.15, 0.5, by=0.05),
                     ate = c(-0.05, -0.1, -0.15),
                     compliance1 = seq(0.1, 1, by=0.025))
params1$compliance2 = params1$compliance1
params1 = params1 %>% filter(baserate + ate > 0)
params1 = params1[rep(1:nrow(params1), each=20),]

sims_a = lapply((1:nrow(params1)),
                FUN = function(x){
                  one_audit(n = 5000, races = c("White", "Black") ,
                            compliance = c(params1$compliance1[x], params1$compliance2[x]),
                            callback_rates = c(params1$baserate[x], params1$baserate[x] + params1$ate[x]))
                })




## (b) Two races, asymmetric compliance

params2 = expand.grid(baserate = seq(0.15, 0.5, by=0.05),
                     ate = c(-0.05, -0.1, -0.15),
                     compliance1 = seq(0.1, 1, by=0.05),
                     compliance2 = seq(0.1, 1, by=0.05))
params2 = params2 %>% filter(baserate + ate > 0)
races = c("White", "Black") 
params2 = params2[rep(1:nrow(params2), each=20),]

sims_b = lapply(1:nrow(params2),
                FUN = function(x){
                  one_audit(n = 5000, races = c("White", "Black") ,
                            compliance = c(params2$compliance1[x], params2$compliance2[x]),
                            callback_rates = c(params2$baserate[x], params2$baserate[x] + params2$ate[x]))
                })




## (c) Four races, symmetric compliance

params3 = expand.grid(baserate = seq(0.15, 0.5, by=0.05),
                     ate1 = c(-0.05, -0.1, -0.15),
                     ate2 = c(-0.05, -0.1, -0.15),
                     ate3 = c(-0.05, -0.1, -0.15),
                     compliance1 = seq(0.1, 1, by=0.1))
params3$compliance4 = params3$compliance3 = params3$compliance2 = params3$compliance1
params3 = params3 %>% filter(baserate + ate1 > 0,
                           baserate + ate2 > 0,
                           baserate + ate3 > 0)
params3 = params3[rep(1:nrow(params3), each=10),]


sims_c = lapply(1:nrow(params3),
                FUN = function(x){
                  one_audit(n = 2000, races = c("White", "Black", "Hispanic", "Asian") ,
                            compliance = c(params3$compliance1[x], params3$compliance2[x],
                                           params3$compliance3[x], params3$compliance4[x]),
                            callback_rates = c(params3$baserate[x], params3$baserate[x] + params3$ate1[x],
                                               params3$baserate[x] + params3$ate2[x],
                                               params3$baserate[x] + params3$ate3[x]))
                })



## (d) Four races, asymmetric compliance
set.seed(12345)

params4 = expand.grid(baserate = seq(0.15, 0.5, by=0.15),
                     ate1 = c(-0.05, -0.1),
                     ate2 = c(-0.05, -0.1),
                     ate3 = c(-0.05, -0.1),
                     compliance1 = c(0.3,.5,.7,.95),
                     compliance2 = c(0.3,.5,.7,.95),
                     compliance3 = c(0.3,.5,.7,.95),
                     compliance4 = c(0.3,.5,.7,.95))
params4 = params4 %>% filter(baserate + ate1 > 0,
                           baserate + ate2 > 0,
                           baserate + ate3 > 0)
params4 = params4[rep(1:nrow(params4), each=10),]

sims_d = lapply(1:nrow(params4),
                FUN = function(x){
                  one_audit(n = 1000, races = c("White", "Black", "Hispanic", "Asian"),
                            compliance = c(params4$compliance1[x], params4$compliance2[x],
                                           params4$compliance3[x], params4$compliance4[x]),
                            callback_rates = c(params4$baserate[x], params4$baserate[x] + params4$ate1[x],
                                               params4$baserate[x] + params4$ate2[x],
                                               params4$baserate[x] + params4$ate3[x]))
                })



#### Wrap it all up and save the simulation results
save(sims_a, sims_b, sims_c, sims_d,
     params1, params2, params3, params4, file="compliance_simulations.RData")


#### Load and collapse
load("compliance_simulations.RData")


dat1 = do.call(rbind, sims_a) %>% cbind(params1)
dat2 = do.call(rbind, sims_b) %>% cbind(params2)
dat3 = do.call(rbind, sims_c) %>% cbind(params3[rep(1:nrow(params3), each=3),])
dat4 = do.call(rbind, sims_d) %>% cbind(params4[rep(1:nrow(params4), each=3),])
gdata::keep(dat1,dat2,dat3,dat4, sure=T)



### First, plot the two-race, asymmetric-compliance case

plot_df3 = dat2 %>% group_by(ate, compliance1, compliance2) %>% 
  summarize(avg_rmse = sqrt(mean((ate-estimate)^2)),
            mae = mean(abs(ate-estimate)),
            bias = mean(estimate) - mean(ate),
            sigpct = mean(p.value < 0.05))

p3 = ggplot(plot_df3 %>%
              mutate(yval = bias,
                     ATE = factor(ate),
                     compliance1 = as.numeric(compliance1),
                     compliance2 = as.numeric(compliance2)) %>%
              filter(compliance2=="0.95"),
            aes(x = compliance1, y = yval)) +
  ylim(c(0,.15)) +
  geom_vline(xintercept = 0.95, col = "black",
             linetype = "dashed", linewidth=1.25) + 
  geom_line(aes(linetype = ATE)) + 
  xlab("Compliance Rate for Black Profiles") + 
  ylab("Bias") + 
  theme_bw() + 
  annotate("text", x = 0.575, y = 0.145, label="Pictures, names, and resumés") +
  theme(legend.position = "n") + 
  ggtitle("White Profile Compliance: 95%")
p3


plot_df4 = dat2 %>% group_by(ate, compliance1, compliance2) %>% 
  summarize(avg_rmse = sqrt(mean((ate-estimate)^2)),
            mae = mean(abs(ate-estimate)),
            bias = mean(estimate) - mean(ate),
            sigpct = mean(p.value < 0.05))

p4 = ggplot(plot_df4 %>% mutate(yval = bias,
                                ATE = factor(ate)) %>%
              filter(compliance2==0.75),
            aes(x = compliance1, y = yval)) +
  geom_vline(xintercept = 0.4, col = "black",
             linetype = "dashed", lwd=1.25) + 
  ylim(c(0,.15)) +
  geom_line(aes(lty = ATE)) + 
  xlab("Compliance Rate for Black Profiles") + 
  ylab("") +
  theme_bw() + 
  annotate("text", x = 0.55, y = 0.145, label="Names only") + 
  ggtitle("White Profile Compliance: 75%")
p4

fig2_v2 = plot_grid(p3, p4, rel_widths = c(0.43,0.57))

save_plot(fig2_v2, file="compliance_simulations_v2.pdf",
          base_height=5)


### Next, plot the two-race, asymmetric-compliance case p-vals

p3b = ggplot(plot_df3 %>%
              mutate(yval = sigpct,
                     ATE = factor(ate),
                     compliance1 = as.numeric(compliance1),
                     compliance2 = as.numeric(compliance2)) %>%
              filter(compliance2=="0.95"),
            aes(x = compliance1, y = yval)) +
#  ylim(c(0,.15)) +
  geom_vline(xintercept = 0.95, col = "black",
             linetype = "dashed", linewidth=1.25) + 
  geom_line(aes(linetype = ATE)) + 
  xlab("Compliance Rate for Black Profiles") + 
  ylab("Proportion of Significant Effects (p<0.05)") + 
  theme_bw() + 
  annotate("text", x = 0.575, y = 0.145, label="Pictures, names, and resumés") +
  theme(legend.position = "n") + 
  ggtitle("White Profile Compliance: 95%")
p3b

p4b = ggplot(plot_df4 %>% mutate(yval = sigpct,
                                ATE = factor(ate)) %>%
              filter(compliance2==0.75),
            aes(x = compliance1, y = yval)) +
  geom_vline(xintercept = 0.4, col = "black",
             linetype = "dashed", lwd=1.25) + 
  geom_line(aes(lty = ATE)) + 
  xlab("Compliance Rate for Black Profiles") + 
  ylab("") +
  theme_bw() + 
  annotate("text", x = 0.55, y = 0.145, label="Names only") + 
  ggtitle("White Profile Compliance: 75%")
p4b

fig2b_v2 = plot_grid(p3b, p4b, rel_widths = c(0.43,0.57))

save_plot(fig2b_v2, file="compliance_simulations_v2_pvals.pdf",
          base_height=5)









#### Four-race plots
load("compliance_simulations.RData")
dat4 = do.call(rbind, sims_d) %>% cbind(params4[rep(1:nrow(params4), each=3),])
gdata::keep(dat4, sure=T)


dat4$ate = NA
dat4$ate[dat4$term == "raceBlack"] = dat4$ate1[dat4$term == "raceBlack"]
dat4$ate[dat4$term == "raceHispanic"] = dat4$ate2[dat4$term == "raceHispanic"]
dat4$ate[dat4$term == "raceAsian"] = dat4$ate3[dat4$term == "raceAsian"]
dat4$Race = NA
dat4$Race[dat4$term == "raceBlack"] = "Black"
dat4$Race[dat4$term == "raceHispanic"] = "Hispanic"
dat4$Race[dat4$term == "raceAsian"] = "Asian"

## Name only
plotdf_a1 = dat4 %>%
  group_by(compliance1, compliance2, compliance3, compliance4, baserate, Race) %>% 
  summarize(avg_rmse = sqrt(mean((ate-estimate)^2)),
            mae = mean(abs(ate-estimate)),
            bias = mean(estimate) - mean(ate))

p1a = ggplot(plotdf_a1 %>%
              mutate(yval = bias,
                     compliance1 = as.numeric(compliance1),
                     compliance2 = as.numeric(compliance2)) %>%
              filter(compliance1=="0.7",
                     compliance2== "0.3",
                     compliance3== "0.3",
                     compliance4== "0.3"),
            aes(x = Race, y = yval)) +
  ylim(c(0,0.12)) +
  geom_bar(stat = "identity") + 
  xlab("Race") + 
  ylab("Bias") + 
  theme_bw() + 
  theme(legend.position = "n") + 
  ggtitle("Name only")
p1a



## Picture only
p1b = ggplot(plotdf_a1 %>%
               mutate(yval = bias,
                      compliance1 = as.numeric(compliance1),
                      compliance2 = as.numeric(compliance2)) %>%
               filter(compliance1=="0.7",
                      compliance2== "0.7",
                      compliance3== "0.5",
                      compliance4== "0.7"),
             aes(x = Race, y = yval)) +
  ylim(c(0,0.12)) +
  geom_bar(stat = "identity") + 
  xlab("Race") + 
  ylab("Bias") + 
  theme_bw() + 
  theme(legend.position = "n") + 
  ggtitle("Picture only")
p1b

## Picture and name
p1c = ggplot(plotdf_a1 %>%
               mutate(yval = bias,
                      compliance1 = as.numeric(compliance1),
                      compliance2 = as.numeric(compliance2)) %>%
               filter(compliance1=="0.95",
                      compliance2== "0.95",
                      compliance3== "0.7",
                      compliance4== "0.95"),
             aes(x = Race, y = yval)) +
  ylim(c(0,0.12)) +
  geom_bar(stat = "identity") + 
  xlab("Race") + 
  ylab("Bias") + 
  theme_bw() + 
  theme(legend.position = "n") + 
  ggtitle("Picture and name")
p1c

## Picture, name, resume
p1d = ggplot(plotdf_a1 %>%
               mutate(yval = bias,
                      compliance1 = as.numeric(compliance1),
                      compliance2 = as.numeric(compliance2)) %>%
               filter(compliance1=="0.95",
                      compliance2== "0.95",
                      compliance3== "0.95",
                      compliance4== "0.95"),
             aes(x = Race, y = yval)) +
  ylim(c(0,0.12)) +
  geom_bar(stat = "identity") + 
  xlab("Race") + 
  ylab("Bias") + 
  theme_bw() + 
  theme(legend.position = "n") + 
  ggtitle("Picture, name, and resumé")
p1d



fig_a2 = cowplot::plot_grid(p1a,p1b,p1c,p1d, nrow=2)
save_plot(fig_a2, file="compliance_simulations_4way.pdf",
          base_height=5)
